Run this section once at the beginning of each R session.
library(dplyr)
library(Seurat)
library(patchwork)
library(RColorBrewer)
library(EnhancedVolcano)
custom_color_palette <- c("#1B9E77", "#D95F02", "#7570B3", "#CA00C4",
"#66A61E", "#E6AB02", "#A6761D", "#666666", "#194D33")
LN_Trm_genes <- c("Acap1", "Actn2", "Amica1", "Arhgef1", "Atxn7l3b", "Aw112010",
"B4galnt1", "Bcl11b", "Cbx3", "Ccnd2", "Ccr10", "Cd27", "Cd7",
"Cd74", "Chd3", "Cirbp", "Clec2d", "Crot", "Csf1", "Cxcr3",
"Cxcr6", "Eif5", "Evl", "Fam189b", "Fubp1", "Fyb", "Gramd1a",
"Sema4a","Gstp1", "Shisa5", "H2-T23", "Sipa1", "Hmgn1",
"Slfn2", "Hmha1", "Sp100", "Hsp90b1", "Spcs2", "Id2", "Srrm2",
"Ifitm10", "Stap1", "Ikzf3", "Tbc1d10c", "Il16", "Tesc",
"Il18r1", "Tnfaip8", "Il7r", "Tnrc6a", "Irf2bpl", "Tsc22d4",
"Itgae", "Uba52", "Itgal", "Ucp2", "Itm2c", "Wbp1", "Lfng",
"Xist", "Lpar6", "Ypel3", "Lrrc58", "Zbtb7a", "Ltb", "Znrf1",
"Ly6a", "Ly6e", "Ly6g5b", "Malat1", "Mbnl1", "Mrpl52", "Mxd4",
"Mycbp2", "N4bp2l2", "N4bp2l2", "Ndufa3", "Ndufa5", "Nktr",
"Nudcd3", "Ogt", "Pdcd4", "Pdia3", "Pdia6", "Ptpn7", "Ptprc",
"Rapgef6", "Rbpj", "Rgs10", "Rpl15", "Rpl35", "Rpl38",
"Rps28", "Rps29", "Sash3")
TGFb_genes <- c("Mcpt2","Cdh1","Spsb1","Vipr2","Gpr56","Src","Ppp2r2c","Lrig1",
"Itgae","Agap1","Ncmap","Pmepa1","Sema6d","Emid1","Cd33","Dlk1",
"Ldlrad4","Car2","Cpd","Nt5e","Tspan9","Gsg2","Klhl30",
"1810011H11Rik","Osgin1","Ccl1","Litaf","Itga1","Kifc3",
"Hsf2bp","Asic3","Abi3","Smurf2","Phactr2","Oplah","Qpct",
"Tfr2","Isg20","Rnase6","Rgs1","2900026A02Rik","Mmp11",
"Tnfsf11","Nrarp","Cyb561","Smyd1","Kcnip2","Cx3cr1","Nek6",
"Nlrp1b","St8sia1","Arhgap39","Jup","Htra3","Rgs16","H2-M5",
"Chn2","Cish","Atp6v0a1","Skil","Dok3","Igflr1","Ccr8","Timp2",
"Zfyve28","Ppm1n","Hpgds","B4galnt4","Ifng","Ctnnal1","Clec12a",
"Exoc3l","Coro2a","Ikzf4","Adamts6","D8Ertd82e","Smpd5","Aqp3",
"Evpl","Ramp1","St8sia6","Xcl1","Scn1b","Rnf149","Dtx4","Gngt2",
"Sbk1","Tbc1d16","Tnfrsf13c","Gna12","Ermn","Neu3","Fmnl3",
"Cd83","Epb4.1l2","Ccdc112","Adam19","Rab26","Fam101b","Mical3",
"Prkcz","Grina","Slc27a6","Tgfbr3","Fgfr1","Msc","Rgs10",
"Lonrf1","Lax1","Kcnc1","Nphp1","Slc16a10","Kif13a","Ninj1",
"Smyd3","9430020K01Rik","Csgalnact1","Gpaa1","Ski","Gcnt4",
"Map9","Egr3","Fam161a","Egr1","Fndc3a","Mapkapk3","Ctss",
"Hnrnpll","Galm","Dusp2","Stom","Esm1","1700049G17Rik",
"Plekho1","Med10","Smtn","Gpr34","Sepn1","Egr2","Prrt2","Aen",
"Cd101","Gtf2ird1","Tiam1","Camkk1","D430042O09Rik","Fam214b",
"Matk","Ralgps1","Dapk2","Usp6nl","Foxred2","Wdyhv1","Znrf1",
"Tjp1","Irf8","Hemk1","Pgap1","Accs","Aim2","Per3","Zfr2",
"Lgalsl","1700001L05Rik","Zfp820","D3Ertd254e","Gcnt1",
"Slc41a2","Ttc39b","Gclm","Peg13","Slc9a1","Adora3","Cers6",
"Ccrn4l","Cd96","Golim4","Lpcat2","Lsr","Acsbg1","Eef2k",
"Plekhf1","Rbm20","Ssx2ip","Ankrd50","Igfbp4","Inpp4b",
"Irf2bpl","Pygl","Zfp1","Golm1","Gpr68","Ptgfrn","Tsc22d1",
"Abca1","Fam124b","Itpripl2","Bcl6","Lysmd2","Trp53inp2",
"Zdhhc13","Bpgm","F2r","Frmd4b","Ctsw","Swap70","Frmd6","Gas7",
"Gdpd5","Spire1","Tet3","Batf3","Dstyk","Luzp1","Mgat5","Ptpre",
"Ralgps2","Mif4gd","Stat1","Ttc3","Abhd15","Cerk","Adssl1",
"Pcyt1b","Rai1","Blcap","Map3k14","Rnf19b","Scai","Tmem57",
"Atp6v1g2","Chst12","Fam20a","Gtf3c1","Trp53inp1","Wdr78",
"Aars2","Cd244","Ly6g5b","Tbx6","Usp22","Zfp827",
"1600014C10Rik","Als2","Arhgef5","B4galt5","Nfat5","Prkacb",
"Rgs2","Slc9a3r1","Soat1","Tctn3","Ttc39c","Cotl1","Ldlrap1",
"Ncf1","Iigp1","Ikzf3","Ipcef1","Irf4","Abi2","Runx3","Ypel3",
"Entpd1","Fut8","Inpp5f","Apol7e","Arhgef12","Nrp1","Slc26a11",
"Tnfrsf1b","Cd160","Gfod1","Gm12185","H6pd","Pmm1","Tmem2",
"Ublcp1","Dennd3","Gramd1a","Idh2","Ppip5k1","Slc39a13",
"Baiap3","Extl3","Mxd1","Nipal1","Rrp1b","Twsg1","Cdc42bpg",
"Celsr1","Ehd1","Kit","Slc22a15","Tmcc1","Camsap2","Klhl25",
"Ncf4","Plcxd2","Rab11fip4","Specc1","Fam3c","Fuca2","Pde4a",
"Prr12","Ctnnb1","Egln3","Fam46a","Fbxo25","Gprin3","Scly",
"AW112010","Cd1d1","Lrrc61","Clstn1","Exosc4","Smad7","Susd3",
"Traf4","Vasp","Gne","Gpbp1l1","Prkch","Rab37","Rbpj","Usp11",
"AA467197","Bmpr2","Cd8a","Dpp9","Inpp5d","Kif1b","Lasp1",
"Rftn1","Wee1","Fasl","Nbas","Plscr1","Prkdc","Rhoh","Spcs2",
"Suox","Tbc1d4","Tgif1","Anp32a","Lnpep","Myo5a","Rreb1",
"Zfp706","Ermp1","Fam149b","Glrx","Pacsin2","Plekha2","Sorl1",
"Dnajc9","Nbeal1","Plod2","Ssh2","Trappc10","Ercc6","Fchsd2",
"Gfi1","Ubn2","Vps54","Actr1b","Ccni","Cd2bp2","Tnfsf10",
"Acot11","Atad2","Lgals9","Nup153","Gtpbp1")
Create the Seurat Object.
scobj.data <- Read10X(data.dir="LN/filtered/")
scobj <- CreateSeuratObject(counts=scobj.data, project="2022-12-29_LN",
min.cells=3, min.features=200)
Calculate the % mitochondrial and ribosomal genes for each cell.
scobj[["percent.mt"]] <- PercentageFeatureSet(scobj, pattern="^mt-")
scobj[["percent.rps"]] <- PercentageFeatureSet(scobj, pattern="Rps")
scobj[["percent.rpl"]] <- PercentageFeatureSet(scobj, pattern="Rpl")
Visualize the QC features as violin plots.
VlnPlot(scobj, features=c("nFeature_RNA", "nCount_RNA"), ncol=2)
VlnPlot(scobj, features=c("percent.mt", "percent.rps", "percent.rpl"), ncol=3)
Visualize the QC features as FeatureScatter plots.
plot1 <- FeatureScatter(scobj, feature1="nCount_RNA", feature2="percent.mt")
plot2 <- FeatureScatter(scobj, feature1="nCount_RNA", feature2="nFeature_RNA")
plot1 + plot2
Apply filters and normalize data.
scobj <- subset(scobj, subset=(nFeature_RNA > 100 & nFeature_RNA < 6000
& percent.mt < 5 & percent.rps < 20 & percent.rpl < 23))
scobj <- NormalizeData(scobj, normalization.method="LogNormalize",
scale.factor=10000)
scobj <- FindVariableFeatures(scobj, selection.method="vst", nfeatures=2500)
all.genes <- rownames(scobj)
scobj <- ScaleData(scobj, features=all.genes)
## Centering and scaling data matrix
Perform PCA.
scobj <- RunPCA(scobj, features=VariableFeatures(object=scobj))
## PC_ 1
## Positive: Pclaf, Birc5, Stmn1, Mki67, Cdca8, Nusap1, Hist1h1b, Tpx2, Top2a, Ccna2
## Knl1, Spc24, Hmmr, Rrm2, Kif11, Esco2, Cks1b, Neil3, Bub1, Cdk1
## Cdca3, Depdc1a, Hist1h2af, Ube2c, Aurkb, Ccnb2, Hist1h3c, Asf1b, Ckap2l, Mxd3
## Negative: Arhgap15, Ccnd3, Foxp1, Themis, Dock2, Lncpint, Gm2682, Maml2, Zbtb20, Gm26740
## Stat4, Ripor2, Dock10, Slc9a9, Elmo1, Skap1, Smyd3, Ikzf1, Pitpnc1, Inpp4b
## Tcf12, Dennd4a, Fgf13, Tnik, Nedd9, Zswim6, Cblb, Thada, Atxn1, Zeb1
## PC_ 2
## Positive: Dock10, Arhgap15, Dock2, Slc9a9, Themis, Skap1, Elmo1, Gm2682, Maml2, Atp8b4
## Ikzf1, Atxn1, Pitpnc1, Stag1, Foxp1, Tcf12, Lncpint, Ccnd3, Fam172a, Exoc4
## Iqgap2, Zbtb20, Rabgap1l, Lrba, Pag1, Cblb, Slco3a1, Zeb1, Mctp2, Smyd3
## Negative: Rplp0, Rpl41, Rps20, Rps2, Rps12, Rpl28, Actb, Pfn1, Ppia, Cd52
## Nme2, Ccl5, S100a6, Sh3bgrl3, Actg1, S100a10, Hsp90ab1, Gapdh, Rbm3, Crip1
## Eif5a, Emp3, Rps17, Ly6a, Lgals1, Ptma, Vim, Clic1, Prdx1, Ndufa4
## PC_ 3
## Positive: Igkc, Cd79a, Ebf1, Mef2c, Ighd, Bank1, Iglc2, H2-DMb2, Cd74, H2-Ab1
## H2-Eb1, Pax5, Fcmr, H2-Aa, Ms4a1, Ly6d, Iglc3, Blnk, March1, Syk
## Gm31243, Cd79b, Ly86, Fcer2a, Ciita, Wdfy4, Bcl11a, Siglecg, Napsa, Blk
## Negative: Gm2682, Skap1, Themis, Atp8b4, Epsti1, Slc9a9, Atxn1, Iqgap2, Runx2, Ahnak
## Slco3a1, Pitpnc1, Gm44174, Ccl5, S100a6, Fgf13, Itgb1, Cd226, Ust, Ctla2a
## Camk4, Gramd1b, Maml2, Ccr2, Gab3, Sntb1, Mllt3, Dock10, Sidt1, Stat4
## PC_ 4
## Positive: Csf1, Chn2, Gzmb, Gm36723, Osbpl3, Itga1, Atp8a2, AU020206, Hip1, Vav3
## Gstm5, Rbpj, Cish, 2900026A02Rik, Itgae, Tjp1, Rbms3, Inpp4b, Coro2a, Fgl2
## Mmp16, Mdfic, Pip5k1b, Gm42418, Arsb, Filip1, Rgs1, Pde11a, Cd226, St6galnac3
## Negative: Sidt1, Aff3, Sell, Itgb1, Klf3, Nme2, Rplp0, 1700025G04Rik, Rps2, Bach2
## Arhgef18, Rps12, Pde2a, Rps20, Cmah, Actn1, H2afz, Hsp90ab1, Rpl41, Atp1a1
## Rasa3, Ndufa4, Crip1, Nsg2, Elmo1, Tagln2, Lef1, Itga4, Nrp1, Txk
## PC_ 5
## Positive: S100a6, Lgals1, S100a4, S100a10, Sh3bgrl3, Cd52, Crip1, Vim, Lgals3, Capg
## Ly6a, Ahnak, Ifitm2, Ifitm1, Anxa1, Ccr2, Actb, Emp3, Pfn1, Ifitm3
## Gapdh, Ppia, Clic1, Cd48, Rpl41, Ctla2a, AA467197, Gzmb, Reep5, Rps2
## Negative: Apoe, Gm42418, Ifngas1, Lyz2, Dapl1, Nusap1, Tox, Btbd11, Mxd3, Hist1h2af
## Kif14, Aif1, Esco2, C1qa, Anln, Knl1, Mis18bp1, Kif11, Hist1h2ab, Cenpf
## Taco1, Hist1h3c, Klf3, Hist1h1b, Prc1, Bub1b, C1qb, Hist1h2bb, Ccnf, Birc5
Jackstraw and Elbow Plots to visualize dimensionality of dataset.
scobj <- JackStraw(scobj, dims=30, num.replicate=100)
scobj <- ScoreJackStraw(scobj, dims=1:30)
JackStrawPlot(scobj, dims=1:30)
## Warning: Removed 56380 rows containing missing values (`geom_point()`).
ElbowPlot(scobj, ndims=30)
Run clustering and UMAP algorithms. Dims based on Jackstraw and Elbow plots.
scobj <- FindNeighbors(scobj, dims=1:15)
## Computing nearest neighbor graph
## Computing SNN
scobj <- FindClusters(scobj, resolution=0.3)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 4102
## Number of edges: 141924
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8766
## Number of communities: 9
## Elapsed time: 0 seconds
scobj <- RunUMAP(scobj, dims=1:15)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 13:08:06 UMAP embedding parameters a = 0.9922 b = 1.112
## 13:08:06 Read 4102 rows and found 15 numeric columns
## 13:08:06 Using Annoy for neighbor search, n_neighbors = 30
## 13:08:06 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 13:08:06 Writing NN index file to temp file /tmp/RtmpnVd11M/file31d894ca6680
## 13:08:06 Searching Annoy index using 1 thread, search_k = 3000
## 13:08:08 Annoy recall = 100%
## 13:08:08 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 13:08:09 Initializing from normalized Laplacian + noise (using irlba)
## 13:08:09 Commencing optimization for 500 epochs, with 169224 positive edges
## 13:08:27 Optimization finished
Show the UMAP and the number of cells in each cluster.
DimPlot(scobj, reduction="umap", pt.size=2.5, cols=custom_color_palette)
table(Idents(scobj))
##
## 0 1 2 3 4 5 6 7 8
## 1567 934 596 379 182 168 126 78 72
Find markers for all clusters.
scobj.markers <- FindAllMarkers(scobj, only.pos=TRUE, min.pct=0.25,
logfc.threshold=0.25)
## Calculating cluster 0
## Calculating cluster 1
## Calculating cluster 2
## Calculating cluster 3
## Calculating cluster 4
## Calculating cluster 5
## Calculating cluster 6
## Calculating cluster 7
## Calculating cluster 8
Generate heatmap for top 10 genes in all clusters.
scobj.markers %>%
group_by(cluster) %>%
top_n(n=10, wt=avg_log2FC) -> top10
DoHeatmap(scobj, features=top10$gene,
group.colors=custom_color_palette) + NoLegend()
sub1_scobj <- subset(x=scobj, idents=c(4, 7, 8), invert=TRUE)
Determine highly variables genes.
sub1_scobj <- FindVariableFeatures(sub1_scobj, selection.method="vst",
nfeatures=2500)
Rescale data.
sub1_all.genes <- rownames(sub1_scobj)
sub1_scobj <- ScaleData(sub1_scobj, features=sub1_all.genes)
## Centering and scaling data matrix
Re-run PCA.
sub1_scobj <- RunPCA(sub1_scobj, features=VariableFeatures(object=sub1_scobj))
## PC_ 1
## Positive: Pclaf, Birc5, Mki67, Nusap1, Stmn1, Hist1h1b, Top2a, Kif11, Tpx2, Cdca8
## Knl1, Neil3, Esco2, Hmmr, Ccna2, Spc24, Rrm2, Bub1, Hist1h2af, Cdk1
## Depdc1a, Cdca3, Aurkb, Shcbp1, Kif4, Hist1h3c, Cks1b, Ccnb2, Ube2c, Bub1b
## Negative: Ccl5, Ccnd3, Arhgap15, Foxp1, Gm26740, Bcl2, Stat4, Sidt1, Lef1, Gm2682
## Ripor2, Prkcq, Gm42418, Lncpint, Scml4, Peli1, Sell, Camk1d, Zbtb20, Pde7a
## Themis, mt-Co1, Utrn, Junb, Satb1, Bach2, Dock2, Thada, Mbnl1, Maml2
## PC_ 2
## Positive: Rpsa, Rplp0, Rpl41, Rps2, Tmsb4x, Ppia, Rpl28, Rps20, Pfn1, Actb
## Actg1, Nme2, Cd52, Sh3bgrl3, S100a10, Rps12, Gapdh, Rbm3, Hspa8, Emp3
## Crip1, S100a6, Eif5a, Vim, Lgals1, Prdx1, Clic1, Ptma, H2afz, Ly6a
## Negative: Arhgap15, Themis, Dock2, Dock10, Slc9a9, Gm2682, Mbnl1, Skap1, Utrn, Ankrd44
## Atp8b4, Grap2, Maml2, Elmo1, Atxn1, Ikzf1, Pitpnc1, Macf1, Prkcq, Foxp1
## Arhgap26, Tcf12, Ccnd3, Zbtb20, Ppm1h, Lncpint, Aak1, Vps13b, Pde7a, Stag1
## PC_ 3
## Positive: Csf1, Gzmb, Chn2, Itga1, Gm36723, AU020206, Cish, Osbpl3, Rbpj, Hip1
## Gstm5, S100a4, Cd52, Atp8a2, Fgl2, Sh3bgrl3, Ly6a, Vav3, Itgae, 2900026A02Rik
## Inpp4b, Tjp1, Mdfic, Havcr2, Rbms3, Cd226, Coro2a, AA467197, Capg, Pik3ap1
## Negative: Sidt1, Aff3, Sell, Rplp0, Klf3, Rps12, Rps2, Rpsa, 1700025G04Rik, Bach2
## Itgb1, Rps20, Pde2a, Nme2, Arhgef18, Ccr7, Cmah, Nsg2, Lef1, Rpl41
## Actn1, Elmo1, Itga4, Txk, Arhgap15, Slamf6, Fgf13, Il6ra, H2afz, Ndufa4
## PC_ 4
## Positive: Lgals3, S100a6, Lgals1, Vim, Ifitm1, Ifitm2, Capg, Crip1, Ahnak, S100a10
## S100a4, Rap1gap2, Dgkh, Itgb1, Itsn1, Runx2, Anxa1, Nebl, Cd48, Ccr2
## Iqgap2, Nrp1, Ifitm3, Reep5, Emp3, Klf12, Fry, Swap70, Rasa3, Ripor2
## Negative: Plac8, Ifi27l2a, Xcl1, Gimap7, Ifngas1, Ighm, Btbd11, Tox, Eomes, Dapl1
## Samd3, Isg15, A330040F15Rik, Ifit1, Prkch, Usp18, Ddx60, Chn2, Setbp1, Abtb2
## Rps20, Rgs1, Oas1a, Rnf213, Cpq, Ifit3, Rtp4, Atp8a2, Ifit3b, Gzmk
## PC_ 5
## Positive: Ube2c, Cenpf, Ccnb1, Aspm, Ccnb2, Kif2c, Prr11, Hmmr, Pif1, Cdkn3
## Pimreg, Cenpa, Ckap2, Ccna2, Cdc20, Cdca3, Cenpe, Arhgap19, Birc5, Gas2l3
## Kif14, Plk1, Mxd3, Cep55, Nusap1, Depdc1a, Nuf2, Kif23, Cdc25c, Aurka
## Negative: Dtl, E2f1, Cdca7, Lig1, Chaf1a, Hells, Cdc6, Mcm5, Uhrf1, Chek1
## Rad51b, Mcm3, Mcm6, Mcm2, Mcm4, Timeless, Mcm7, Pole, Atad5, Pola1
## Brca1, Dhfr, Clspn, Mms22l, Cdc45, Hsp90ab1, Rad51, Ranbp1, Cenps, Fignl1
Re-run Jackstraw and Elbow plots to determine new dimensionality.
sub1_scobj <- JackStraw(sub1_scobj, dims=30, num.replicate=100)
sub1_scobj <- ScoreJackStraw(sub1_scobj, dims=1:30)
JackStrawPlot(sub1_scobj, dims=1:30)
## Warning: Removed 57280 rows containing missing values (`geom_point()`).
ElbowPlot(sub1_scobj, ndims=30)
Rerun clustering and UMAP algorithms. Dims based on Jackstraw and Elbow plots.
sub1_scobj <- FindNeighbors(sub1_scobj, dims=1:25)
## Computing nearest neighbor graph
## Computing SNN
sub1_scobj <- FindClusters(sub1_scobj, resolution=0.3)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 3770
## Number of edges: 146241
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8493
## Number of communities: 6
## Elapsed time: 0 seconds
sub1_scobj <- RunUMAP(sub1_scobj, dims=1:25)
## 13:13:08 UMAP embedding parameters a = 0.9922 b = 1.112
## 13:13:08 Read 3770 rows and found 25 numeric columns
## 13:13:08 Using Annoy for neighbor search, n_neighbors = 30
## 13:13:08 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 13:13:09 Writing NN index file to temp file /tmp/RtmpnVd11M/file31d897dc7231
## 13:13:09 Searching Annoy index using 1 thread, search_k = 3000
## 13:13:10 Annoy recall = 100%
## 13:13:11 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 13:13:12 Initializing from normalized Laplacian + noise (using irlba)
## 13:13:12 Commencing optimization for 500 epochs, with 166974 positive edges
## 13:13:29 Optimization finished
Show the UMAP and the number of cells in each cluster.
DimPlot(sub1_scobj, reduction="umap", pt.size=2.5, cols=custom_color_palette)
table(Idents(sub1_scobj))
##
## 0 1 2 3 4 5
## 1675 868 596 347 180 104
Find markers for all clusters.
sub1_scobj.markers <- FindAllMarkers(sub1_scobj, only.pos=TRUE, min.pct=0.25,
logfc.threshold=0.25)
## Calculating cluster 0
## Calculating cluster 1
## Calculating cluster 2
## Calculating cluster 3
## Calculating cluster 4
## Calculating cluster 5
Generate heatmap for top 10 genes in all clusters.
sub1_scobj.markers %>%
group_by(cluster) %>%
top_n(n=10, wt=avg_log2FC) -> sub1_top10
DoHeatmap(sub1_scobj, features=sub1_top10$gene,
group.colors=custom_color_palette) + NoLegend()
Cluster 4 has two top genes (Top2a, Mki67) implicated in cell cycle. This code attempts to regress these signals from the dataset. The result was not used for further analysis, but additional B cells are discovered in the process, which need to be removed from Subset 1.
Set up a copy of sub1_scobj and generate cell cycle gene lists in proper format.
scobj_cc <- sub1_scobj
s.genes <- tolower(cc.genes$s.genes)
for(i in 1:length(s.genes)){
substr(s.genes[i],1,1) <- toupper(substr(s.genes[i], 1, 1))
}
g2m.genes <- tolower(cc.genes$g2m.genes)
for(i in 1:length(g2m.genes)){
substr(g2m.genes[i],1,1) <- toupper(substr(g2m.genes[i], 1, 1))
}
Perform the cell cycle scoring and regression.
scobj_cc <- CellCycleScoring(scobj_cc, s.features=s.genes,
g2m.features=g2m.genes, set.ident=FALSE)
## Warning: The following features are not present in the object: Mlf1ip, not
## searching for symbol synonyms
## Warning: The following features are not present in the object: Fam64a, Hn1, not
## searching for symbol synonyms
scobj_cc <- ScaleData(scobj_cc, vars.to.regress=c("S.Score", "G2M.Score"),
features=rownames(scobj_cc))
## Regressing out S.Score, G2M.Score
## Centering and scaling data matrix
Rerun all analysis from Subset 1 on the regressed dataset.
scobj_cc <- RunPCA(scobj_cc, features=VariableFeatures(object=scobj_cc))
## PC_ 1
## Positive: Arhgap15, Themis, Dock2, Dock10, Slc9a9, Gm2682, Mbnl1, Skap1, Utrn, Ankrd44
## Atp8b4, Grap2, Maml2, Elmo1, Atxn1, Ikzf1, Pitpnc1, Macf1, Prkcq, Foxp1
## Arhgap26, Tcf12, Ccnd3, Zbtb20, Lncpint, Ppm1h, Aak1, Vps13b, Stag1, Pde7a
## Negative: Rpsa, Rplp0, Rpl41, Rps2, Ppia, Tmsb4x, Rpl28, Rps20, Pfn1, Actb
## Nme2, Actg1, Cd52, Rps12, Sh3bgrl3, S100a10, Gapdh, Rbm3, Hspa8, Emp3
## Crip1, S100a6, Eif5a, Vim, Lgals1, Ptma, Prdx1, H2afz, Clic1, Hsp90ab1
## PC_ 2
## Positive: Sidt1, Sell, Aff3, Rplp0, Klf3, Rps12, Bach2, Rpsa, Rps2, 1700025G04Rik
## Pde2a, Rps20, Cmah, Itgb1, Arhgef18, Lef1, Ccr7, Arhgap15, Nsg2, Itga4
## Gm26740, Txk, Elmo1, Nme2, Actn1, Slamf6, Fgf13, Foxp1, Il6ra, Rpl41
## Negative: Csf1, Gzmb, Chn2, S100a4, Gm36723, Cd52, Itga1, Cish, Rbpj, Osbpl3
## Hip1, AU020206, Sh3bgrl3, Gstm5, Fgl2, Ly6a, Itgae, Tjp1, Atp8a2, 2900026A02Rik
## Vav3, Capg, Inpp4b, AA467197, Mdfic, Havcr2, Coro2a, Rbms3, Ccr2, Cd226
## PC_ 3
## Positive: Plac8, Gimap7, Xcl1, Ifi27l2a, Ighm, Ifngas1, Btbd11, Tox, Eomes, Isg15
## Samd3, Dapl1, Chn2, A330040F15Rik, Ifit1, Rgs1, Prkch, Ddx60, Usp18, Atp8a2
## Setbp1, Oas1a, Rnf213, Ifit3, Abtb2, Rps20, Rtp4, Vav3, Cpq, St6galnac3
## Negative: Lgals3, Lgals1, S100a6, Vim, Ifitm1, Crip1, Ifitm2, Capg, S100a10, Ahnak
## Rap1gap2, S100a4, Itgb1, Dgkh, Cd48, Runx2, Nrp1, Iqgap2, Nebl, Itsn1
## Anxa1, Ccr2, Reep5, Emp3, Ifitm3, Ripor2, Rasa3, Klf12, Fry, Swap70
## PC_ 4
## Positive: Hmgb2, Chn2, Smc4, Cenpa, Esm1, Mcm6, Cbx5, Gzmk, Mcm2, Atp8a2
## Pde7a, Anp32e, Xcl1, Plac8, 1500009L16Rik, AU020206, Ifi27l2a, Csf1, Ckap5, Pip5k1b
## Cdca7, Gzmb, E2f1, Pde3b, Cdc20, Vav3, Isg15, Il2ra, Rgs1, Mndal
## Negative: Hist1h2af, Hist1h3c, Esco2, Hist1h1b, Pbk, Hist1h2bb, Neil3, Hist1h2ab, Hist1h2ae, Nusap1
## Aurkb, Hist1h2an, Mxd3, Hist1h2ap, Hist1h3b, Kif14, Hist1h2bm, Kif11, BC030867, Knl1
## Ncaph, Anln, Pclaf, Hist1h1d, Hist1h2bj, Hist1h3i, Mis18bp1, Melk, Ccnf, Hist1h2ak
## PC_ 5
## Positive: Isg15, Ifit1, Ifit3, Rsad2, Ifit3b, Rtp4, Usp18, Slfn5, Cmpk2, Rnf213
## Zbp1, Irf7, Trim30a, Oasl2, Xaf1, A330040F15Rik, Parp9, Phf11c, Stat1, Herc6
## Eif2ak2, Ifi206, Ifih1, Gbp6, A930037H05Rik, Bst2, Isg20, Ifi209, Trim30c, Oas3
## Negative: Inpp4b, Chn2, Prkch, Dusp2, Pip5k1b, Coro2a, Csf1, Vav3, Atp8a2, Tox
## Ttc3, St6galnac3, 2900026A02Rik, Rgs1, Adam19, Pde3b, Pde7a, Xcl1, Cish, Smyd3
## AU020206, Ssbp2, Znrf1, Zbtb20, 4933406I18Rik, Mmp16, Itga1, Btbd11, Itgae, Arsb
scobj_cc <- JackStraw(scobj_cc, dims=30, num.replicate=100)
scobj_cc <- ScoreJackStraw(scobj_cc, dims=1:30)
JackStrawPlot(scobj_cc, dims=1:30)
## Warning: Removed 56178 rows containing missing values (`geom_point()`).
ElbowPlot(scobj_cc, ndims=30)
scobj_cc <- FindNeighbors(scobj_cc, dims=1:25)
## Computing nearest neighbor graph
## Computing SNN
scobj_cc <- FindClusters(scobj_cc, resolution=0.3)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 3770
## Number of edges: 147678
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8436
## Number of communities: 8
## Elapsed time: 0 seconds
scobj_cc <- RunUMAP(scobj_cc, dims=1:25)
## 13:52:19 UMAP embedding parameters a = 0.9922 b = 1.112
## 13:52:19 Read 3770 rows and found 25 numeric columns
## 13:52:19 Using Annoy for neighbor search, n_neighbors = 30
## 13:52:19 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 13:52:20 Writing NN index file to temp file /tmp/RtmpnVd11M/file31d89647aed1e
## 13:52:20 Searching Annoy index using 1 thread, search_k = 3000
## 13:52:21 Annoy recall = 100%
## 13:52:22 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 13:52:22 Initializing from normalized Laplacian + noise (using irlba)
## 13:52:23 Commencing optimization for 500 epochs, with 166362 positive edges
## 13:52:40 Optimization finished
DimPlot(scobj_cc, reduction="umap", pt.size=2.5, cols=custom_color_palette)
table(Idents(scobj_cc))
##
## 0 1 2 3 4 5 6 7
## 1365 1286 583 361 111 27 20 17
scobj_cc.markers <- FindAllMarkers(scobj_cc, only.pos=TRUE, min.pct=0.25,
logfc.threshold=0.25)
## Calculating cluster 0
## Calculating cluster 1
## Calculating cluster 2
## Calculating cluster 3
## Calculating cluster 4
## Calculating cluster 5
## Calculating cluster 6
## Calculating cluster 7
scobj_cc.markers %>%
group_by(cluster) %>%
top_n(n=10, wt=avg_log2FC) -> scobj_cc_top10
DoHeatmap(scobj_cc, features=scobj_cc_top10$gene,
group.colors=custom_color_palette) + NoLegend()
Cluster 7 contains B cells. Write the barcodes of cells in the cluster so they can be removed from the non-regressed dataset.
write.csv(CellsByIdentities(object=scobj_cc, idents=7), "B_cell_barcodes.csv")
Remove the cells from Subset 1 object for simpler re-analysis.
barcodes_to_remove_df = read.csv("B_cell_barcodes.csv")
barcodes_to_remove_df$X <- NULL
barcodes_to_remove = character(length(barcodes_to_remove_df$X7))
for(i in 1:length(barcodes_to_remove_df$X7)){
barcodes_to_remove[i] = as.character(barcodes_to_remove_df$X7[i])
}
sub1_scobj <- sub1_scobj[, !(sub1_scobj@assays$RNA@data@Dimnames[[2]]
%in% barcodes_to_remove)]
This section is a copy of the code from above.
Determine highly variables genes.
sub1_scobj <- FindVariableFeatures(sub1_scobj, selection.method="vst",
nfeatures=2500)
Rescale data.
sub1_all.genes <- rownames(sub1_scobj)
sub1_scobj <- ScaleData(sub1_scobj, features=sub1_all.genes)
## Centering and scaling data matrix
Re-run PCA.
sub1_scobj <- RunPCA(sub1_scobj, features=VariableFeatures(object=sub1_scobj))
## PC_ 1
## Positive: Pclaf, Birc5, Mki67, Nusap1, Stmn1, Hist1h1b, Kif11, Top2a, Tpx2, Cdca8
## Knl1, Neil3, Esco2, Ccna2, Hmmr, Spc24, Rrm2, Hist1h2af, Bub1, Cdk1
## Depdc1a, Aurkb, Cdca3, Hist1h3c, Shcbp1, Kif4, Ccnb2, Cks1b, Ube2c, Mxd3
## Negative: Ccl5, Ccnd3, Foxp1, Arhgap15, Gm26740, Bcl2, Lef1, Stat4, Sidt1, Gm42418
## Sell, Scml4, Ripor2, Prkcq, Peli1, Lncpint, Junb, Gm2682, Zbtb20, Pde7a
## Bach2, Thada, Satb1, Klf3, Themis, Utrn, Zswim6, Gab3, Sntb1, Mbnl1
## PC_ 2
## Positive: Arhgap15, Themis, Dock2, Dock10, Slc9a9, Gm2682, Mbnl1, Skap1, Utrn, Maml2
## Ankrd44, Atp8b4, Grap2, Elmo1, Atxn1, Ikzf1, Pitpnc1, Macf1, Prkcq, Foxp1
## Arhgap26, Tcf12, Ccnd3, Zbtb20, Lncpint, Ppm1h, Vps13b, Aak1, Pde7a, Zeb1
## Negative: Rpsa, Rplp0, Rpl41, Tmsb10, Tmsb4x, Rps2, Ppia, Rpl28, Rps20, Pfn1
## Actg1, Actb, Cd52, Nme2, Sh3bgrl3, S100a10, Rps12, Gapdh, Emp3, Hspa8
## Rbm3, Crip1, S100a6, Eif5a, Vim, Lgals1, Ptma, Prdx1, Clic1, H2afz
## PC_ 3
## Positive: Csf1, Gzmb, Id2, Chn2, Itga1, Gm36723, AU020206, Cish, Rbpj, S100a4
## Osbpl3, Hip1, Gstm5, Cd52, Sh3bgrl3, Fgl2, Atp8a2, Ly6a, Itgae, Vav3
## 2900026A02Rik, Inpp4b, Tjp1, Mdfic, Havcr2, Rbms3, Cd226, AA467197, Coro2a, Capg
## Negative: Sidt1, Aff3, Sell, Rplp0, Klf3, Rps12, Rps2, Rpsa, 1700025G04Rik, Bach2
## Rps20, Itgb1, Pde2a, Nme2, Arhgef18, Ccr7, Cmah, Nsg2, Lef1, Rpl41
## Elmo1, Actn1, Itga4, Txk, Arhgap15, Slamf6, Il6ra, Fgf13, Ndufa4, H2afz
## PC_ 4
## Positive: Lgals3, Lgals1, S100a6, Vim, Ifitm1, Ifitm2, Capg, Crip1, Ahnak, S100a10
## S100a4, Rap1gap2, Dgkh, Itgb1, Itsn1, Runx2, Nebl, Anxa1, Iqgap2, Cd48
## Nrp1, Ccr2, Ifitm3, Reep5, Emp3, Klf12, Fry, Ripor2, Rasa3, Swap70
## Negative: Plac8, Ifi27l2a, Xcl1, Gimap7, Ifngas1, Ighm, Btbd11, Tox, Eomes, Dapl1
## Isg15, Samd3, A330040F15Rik, Ifit1, Prkch, Chn2, Usp18, Ddx60, Rgs1, Setbp1
## Abtb2, Rps20, Oas1a, Rnf213, Ifit3, Atp8a2, Cpq, Rtp4, Ifit3b, Gzmk
## PC_ 5
## Positive: Dtl, E2f1, Cdca7, Lig1, Cdc6, Chaf1a, Hells, Mcm5, Chek1, Uhrf1
## Rad51b, Mcm3, Mcm6, Mcm2, Timeless, Mcm4, Mcm7, Pole, Atad5, Pola1
## Brca1, Dhfr, Clspn, Cdc45, Mms22l, Rad51, Hsp90ab1, Cenps, Ranbp1, Fignl1
## Negative: Ube2c, Cenpf, Ccnb1, Aspm, Ccnb2, Kif2c, Hmmr, Prr11, Cdkn3, Pif1
## Pimreg, Cenpa, Ckap2, Ccna2, Cdc20, Birc5, Cdca3, Arhgap19, Gas2l3, Cenpe
## Mxd3, Kif14, Plk1, Nusap1, Cep55, Depdc1a, Nuf2, Cdc25c, Kif23, Aurka
Re-run Jackstraw and Elbow plots to determine new dimensionality.
sub1_scobj <- JackStraw(sub1_scobj, dims=30, num.replicate=100)
sub1_scobj <- ScoreJackStraw(sub1_scobj, dims=1:30)
JackStrawPlot(sub1_scobj, dims=1:30)
## Warning: Removed 57324 rows containing missing values (`geom_point()`).
ElbowPlot(sub1_scobj, ndims=30)
Rerun clustering and UMAP algorithms. Dims based on Jackstraw and Elbow plots.
sub1_scobj <- FindNeighbors(sub1_scobj, dims=1:25)
## Computing nearest neighbor graph
## Computing SNN
sub1_scobj <- FindClusters(sub1_scobj, resolution=0.3)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 3753
## Number of edges: 147152
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8523
## Number of communities: 6
## Elapsed time: 0 seconds
sub1_scobj <- RunUMAP(sub1_scobj, dims=1:25)
## 13:56:53 UMAP embedding parameters a = 0.9922 b = 1.112
## 13:56:53 Read 3753 rows and found 25 numeric columns
## 13:56:53 Using Annoy for neighbor search, n_neighbors = 30
## 13:56:53 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 13:56:54 Writing NN index file to temp file /tmp/RtmpnVd11M/file31d891d9391ae
## 13:56:54 Searching Annoy index using 1 thread, search_k = 3000
## 13:56:55 Annoy recall = 100%
## 13:56:56 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 13:56:56 Initializing from normalized Laplacian + noise (using irlba)
## 13:56:57 Commencing optimization for 500 epochs, with 166828 positive edges
## 13:57:14 Optimization finished
Make copy of object for readability.
proc_scobj <- sub1_scobj
Make an object that combines the Other T clusters (0, 1, 3, 5) into one cluster (6), excluding the cluster of cycling cells (4).
comb_scobj <- proc_scobj
comb_scobj_ids <- c("6", "6", "2", "6", "4", "6")
names(comb_scobj_ids) <- levels(comb_scobj)
comb_scobj <- RenameIdents(comb_scobj, comb_scobj_ids)
UMAP and number of cells per cluster.
DimPlot(proc_scobj, reduction="umap", pt.size=2.5, cols=custom_color_palette)
table(Idents(proc_scobj))
##
## 0 1 2 3 4 5
## 1291 1243 589 358 170 102
LN Trm module score.
proc_scobj <- AddModuleScore(object=proc_scobj, features=list(LN_Trm_genes),
ctrl=100, name="LN_Trm")
## Warning: The following features are not present in the object: Amica1, Aw112010,
## Hmha1, not searching for symbol synonyms
FeaturePlot(proc_scobj, features="LN_Trm1", pt.size=2.5,
cols=rev(brewer.pal(n=11, name="RdBu")))
TGFb module score.
proc_scobj <- AddModuleScore(object=proc_scobj, features=list(TGFb_genes),
ctrl=100, name="TGFb")
## Warning: The following features are not present in the object: Mcpt2, Vipr2,
## Gpr56, Cd33, Dlk1, Gsg2, Klhl30, 1810011H11Rik, Tfr2, Htra3, D8Ertd82e,
## Epb4.1l2, Fam101b, 9430020K01Rik, 1700049G17Rik, Sepn1, Prrt2, Zfp820,
## D3Ertd254e, Ccrn4l, Tmem57, Cd244, Tmem2, Fam46a, not searching for symbol
## synonyms
FeaturePlot(proc_scobj, features="TGFb1", pt.size=2.5,
cols=rev(brewer.pal(n=11, name="RdBu")))
Violin plots with the combined cluster object.
clusters_of_interest <- c("2", "6") # exclude cycling cells in 4
violin_colors <- c("#FFFFFF", "#7570B3")
VlnPlot(comb_scobj, features=c("Gzmb", "Gzmk", "Prf1"),
cols=violin_colors, idents=clusters_of_interest)
VlnPlot(comb_scobj, features=c("Csf1", "Ccl4", "Ccl5"),
cols=violin_colors, idents=clusters_of_interest)
VlnPlot(comb_scobj, features=c("Klf2", "S1pr1", "Tcf7"),
cols=violin_colors, idents=clusters_of_interest)
Feature plots that export individually.
feature_gene_list <- c("Ccr7", "Sell", "S1pr1", "Gzmb",
"Cxcr6", "Itgae", "Csf1")
for(i in 1:length(feature_gene_list)){
print(FeaturePlot(proc_scobj, features=feature_gene_list[i], pt.size=2.5))
}
Heatmap of top 10 genes per cluster.
proc_scobj.markers <- FindAllMarkers(proc_scobj, only.pos=TRUE, min.pct=0.25,
logfc.threshold=0.25)
## Calculating cluster 0
## Calculating cluster 1
## Calculating cluster 2
## Calculating cluster 3
## Calculating cluster 4
## Calculating cluster 5
proc_scobj.markers %>%
group_by(cluster) %>%
top_n(n=10, wt=avg_log2FC) -> proc_top10
DoHeatmap(proc_scobj, features=proc_top10$gene,
group.colors=custom_color_palette) + NoLegend()
Volcano plot of differentially expressed genes between Trm (cluster 2) and Other T (clusters 0, 1, 3, 5).
cluster2vT_vp.markers <- FindMarkers(proc_scobj,
ident.1=2,
ident.2=c(0, 1, 3, 5),
min.pct=0.25)
EnhancedVolcano(cluster2vT_vp.markers,
lab=rownames(cluster2vT_vp.markers),
x='avg_log2FC',
y='p_val_adj',
title="Trm vs Other T",
pCutoff=0.01,
FCcutoff=0.5,
pointSize=3.0,
labSize=5.0,
selectLab=NULL)
## Warning: One or more p-values is 0. Converting to 10^-1 * current lowest
## non-zero p-value...